{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "多组单智能体.\n",
    "\n",
    "critic共享state,但是actor不共享state."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEYCAYAAABlUvL1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAlw0lEQVR4nO3df3RTZZ4/8HfStKG0JLGFJu3QYp0pllpAoFAysDKzVKpTHRV0V05F5DByqAXBznCwuwirZ6Uc2D3u4iCM7qy4BwFlv+sgPyrbLcoPCQXKdCxFC4yMLT/SArVJW2jaJp/vH9g7BFBJfz2JvF/n3HPofZ4kn/vQ++7NfW5ydSIiICLqY3rVBRDR7YnhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESigLnzVr1uDOO+9Ev379kJmZiUOHDqkqhYgUUBI+7733HgoKCrBs2TIcPXoUI0eORHZ2Nurr61WUQ0QK6FR8sDQzMxNjx47Fb3/7WwCAz+dDYmIi5s+fjxdffLGvyyEiBQx9/YJtbW0oLy9HYWGhtk6v1yMrKwsOh+Omj/F4PPB4PNrPPp8PDQ0NiI2NhU6n6/WaiejWiAiampqQkJAAvf6731j1efhcvHgRXq8XVqvVb73VasUXX3xx08cUFRXh5Zdf7ovyiKgH1NbWYvDgwd/Zp8/DpysKCwtRUFCg/exyuZCUlITa2lqYTCaFlRHRtdxuNxITEzFgwIDv7dvn4TNw4ECEhYWhrq7Ob31dXR1sNttNH2M0GmE0Gm9YbzKZGD5EQehWTof0+WxXREQExowZg9LSUm2dz+dDaWkp7HZ7X5dDRIooedtVUFCAmTNnIiMjA+PGjcO//du/oaWlBbNmzVJRDhEpoCR8/v7v/x4XLlzA0qVL4XQ6ce+99+Kjjz664ST07UhE0NraivoL9YAAMTExiI6O5qwe/eAouc6nu9xuN8xmM1wu1w/qnM+V1ivYd2gf3it/D6c7TgMArHorpt4zFdkTsjEgegBDiIJaIPsmwydItFxuwar/XoV9l/ch7I4w6PRXQ0ZE4HV7kdqRipefeBmxMby2iYJXIPsmP1gaBHw+H/5j63/gU92nMMQatOABrs4aGMwGnDCfwMotK+H1ehVWStRzGD5B4Oz5s/jfc/8LnfHbj2j0Bj2Oth9FRVUFQvBglegGDJ8gsGP/DnjiPN/7dko3SIf//vS/+6gqot7F8AkCjc2N0IXfwkVZYXq4rrj6oCKi3sfwCQJRkVGQju9/KyVeQZQxqg8qIup9DJ8gkPPTHITXh3/vuRzvJS8eG/9YH1VF1LsYPkHgzqQ7MSl2EqTt28NHvIJ75B6MHTGWU+30g8DwCQJ6vR5zH52Lez33osPd4XcEJCLoaOnA4EuDsfjxxQgPD1dYKVHP4UWGQaSpqQnF+4vxP8f/B84wJwDA0mFBTnIOHvvZY7zAkIIer3AOYSICd5MbtWdqIRDY4mwYGDuQoUMhIZB9MyS+TOx2otPpYDaZYU4zqy6FqFfxnA8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJQIOn7179+Lhhx9GQkICdDod/vCHP/i1iwiWLl2K+Ph4REZGIisrCydPnvTr09DQgNzcXJhMJlgsFsyePRvNzc3d2hAiCi0Bh09LSwtGjhyJNWvW3LR95cqVWL16NdatW4eysjJERUUhOzsbra2tWp/c3FxUVVWhpKQE27dvx969ezFnzpyubwURhR7pBgDywQcfaD/7fD6x2WyyatUqbV1jY6MYjUbZtGmTiIgcP35cAMjhw4e1PsXFxaLT6eTs2bO39Loul0sAiMvl6k75RNTDAtk3e/Scz+nTp+F0OpGVlaWtM5vNyMzMhMPhAAA4HA5YLBZkZGRofbKysqDX61FWVnbT5/V4PHC73X4LEYW2Hg0fp/PqF2Bdf891q9WqtTmdTsTFxfm1GwwGxMTEaH2uV1RUBLPZrC2JiYk9WTYRKRASs12FhYVwuVzaUltbq7okIuqmHg0fm80GAKirq/NbX1dXp7XZbDbU19f7tXd0dKChoUHrcz2j0QiTyeS3EFFo69HwSU5Ohs1mQ2lpqbbO7XajrKwMdrsdAGC329HY2Ijy8nKtz+7du+Hz+ZCZmdmT5RBREAv4a1Sbm5tx6tQp7efTp0+joqICMTExSEpKwsKFC/HP//zPSElJQXJyMl566SUkJCTg0UcfBQAMGzYMDzzwAJ599lmsW7cO7e3tmDdvHp588kkkJCT02IYRUZALdCrt448/FgA3LDNnzhSRq9PtL730klitVjEajTJ58mSprq72e45Lly7J9OnTJTo6Wkwmk8yaNUuamppuuQZOtRMFp0D2Td69goh6TCD7ZkjMdhHRDw/Dh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEiJgMKnqKgIY8eOxYABAxAXF4dHH30U1dXVfn1aW1uRn5+P2NhYREdHY9q0aTfcPrmmpgY5OTno378/4uLisGjRInR0dHR/a4goZAQUPnv27EF+fj4OHjyIkpIStLe3Y8qUKWhpadH6vPDCC9i2bRu2bNmCPXv24Ny5c5g6darW7vV6kZOTg7a2Nhw4cADvvPMO1q9fj6VLl/bcVhFR0OvWTQMvXLiAuLg47NmzB/fddx9cLhcGDRqEjRs34vHHHwcAfPHFFxg2bBgcDgfGjx+P4uJiPPTQQzh37hysVisAYN26dVi8eDEuXLiAiIiI731d3jTwh0FEcPnyZbS0tKCmpgZnzpzxazebzbjnnnsQEREBs9kMANDpdCpKpVsUyL4Z8L3ar+VyuQAAMTExAIDy8nK0t7cjKytL65OamoqkpCQtfBwOB4YPH64FDwBkZ2cjLy8PVVVVGDVq1A2v4/F44PF4/DaQQk/n37mGhgYcPnwYpaWlOHHiBOrq6tDe3o729na//mFhYTAajYiOjsbQoUMxadIkTJgwAfHx8dDr9QyiENfl8PH5fFi4cCEmTJiA9PR0AIDT6URERAQsFotfX6vVCqfTqfW5Nng62zvbbqaoqAgvv/xyV0slxUQEXq8XFRUV2LRpE44ePYqvv/4aev3Vd/2dIRIeHn7DY9vb29HQ0ICDBw/iwIEDiIyMxD333INp06bhZz/7Gfr168cQClFdDp/8/HwcO3YM+/fv78l6bqqwsBAFBQXaz263G4mJib3+utR9IoLKykq89dZbOHjwILxeL3Q6HcLCwm75OTrDJSwsDG1tbTh69CjKy8uRmpqKZ555BllZWQE9HwWHLoXPvHnzsH37duzduxeDBw/W1ttsNrS1taGxsdHv6Keurg42m03rc+jQIb/n65wN6+xzPaPRCKPR2JVSSRERQUtLCzZu3IgNGzagubkZOp2uR45SOp+nuroaS5Yswd69e/H8888jLi6OR0EhJKDZLhHBvHnz8MEHH2D37t1ITk72ax8zZgzCw8NRWlqqrauurkZNTQ3sdjsAwG63o7KyEvX19VqfkpISmEwmpKWldWdbKEiICM6cOYOFCxdi3bp1aGlp6ZVQ0Ol08Pl8KC4uxqxZs3D06FF4vd4efx3qHQHNdj333HPYuHEjtm7dirvvvltbbzabERkZCQDIy8vDzp07sX79ephMJsyfPx8AcODAAQBXp9rvvfdeJCQkYOXKlXA6nZgxYwZ+9atfYfny5bdUB2e7gpeI4NChQ/iHf/gHfP311312JCIiCA8Px/PPP48nn3ySb8MUCWTfDCh8vu0X6e2338YzzzwD4OpFhr/+9a+xadMmeDweZGdn44033vB7S/XVV18hLy8Pn3zyCaKiojBz5kysWLECBsOtvQtk+AQnEcHhw4fx4osvorGxsc/fAokIDAYDFixYwABSpNfCJ1gwfIKP6uC5to7OAJo+fbo2o0Z9I5B9k/8z1G0igkuXLqGoqEhp8ABXj847OjqwZs0aVFZWIgT/tt42GD7UbR0dHXj11Vfx1VdfBcVsk06nQ2trK/7xH/8RDQ0Nqsuhb8HwoW4REezYsQN79+4NiuDppNPpcO7cOaxZs4YzYEGK4UPdcu7cObz++utB+/Zm27ZtfXIhLAWO4UNdJiJ4//330dDQEFRHPZ10Oh28Xi/Wr1/v99lACg4MH+qyc+fOYevWrUEZPJ10Oh0+++wzOBwO1aXQdRg+1GUff/xxn15I2FUigg8//JDnfoIMw4e6pKOjA6WlpSFzIV95eTlnvoIMw4e65OzZszh+/HjQH/UAV996NTY24siRI6pLoWswfKhLKioqcOXKFdVl3LKwsDB8+umnQTsrdzti+FCXHDhw4JY/ixcsKioq0NraqroM+gbDhwImItpX6IYKnU6Hy5cv8y4pQYThQwFzuVw33DIpFHz99dc4ceKE6jLoGwwfClhHRwdaW1tD4mTztbxeLy82DCIMHyJSguFDREowfIhICYYPBSw6OhqJiYkhd81MdHS0391WSC2GDwWsX79+GDhwoOoyAtavXz/ExsaqLoO+wfChLklPT4fP51Ndxi0TEfz4xz9GRESE6lLoGwwf6pL09PSQmmoXEaSnp9/0lsykBsOHumTkyJGIi4sLmfM+YWFh+PnPf666DLpGQOGzdu1ajBgxAiaTCSaTCXa7HcXFxVp7a2sr8vPzERsbi+joaEybNk27FXKnmpoa5OTkoH///oiLi8OiRYt4yXsIMplMyMzMDInwEREMGTIEKSkpqkuhawQUPoMHD8aKFStQXl6OI0eO4G//9m/xyCOPoKqqCgDwwgsvYNu2bdiyZQv27NmDc+fOYerUqdrjvV4vcnJy0NbWhgMHDuCdd97B+vXrsXTp0p7dKup1Op0OWVlZqsu4JSKC++67D/369VNdCl2j2zcNjImJwapVq/D4449j0KBB2LhxIx5//HEAwBdffIFhw4bB4XBg/PjxKC4uxkMPPYRz587BarUCANatW4fFixfjwoULt3wykDcNDA6tra341a9+FdTf6yMiGDBgADZs2IDExETV5fzg9clNA71eLzZv3oyWlhbY7XaUl5ejvb3d769hamoqkpKStO/PdTgcGD58uBY8AJCdnQ23260dPd2Mx+OB2+32W0g9o9Go3SY7WIkIHn74YfzoRz9SXQpdJ+DwqaysRHR0NIxGI+bOnYsPPvgAaWlpcDqdiIiIgMVi8etvtVrhdDoBAE6n0y94Ots7275NUVERzGaztvAvWHDQ6XSYOHEiJk6cGJTT7iKCwYMHIzc3l7dNDkIB/4/cfffdqKioQFlZGfLy8jBz5kwcP368N2rTFBYWwuVyaUttbW2vvh7dun79+mHBggWIjY0NqpPPIoKwsDDMnz8f8fHxqsuhmwg4fCIiIvCTn/wEY8aMQVFREUaOHIl///d/h81mQ1tbGxobG/3619XVwWazAQBsNtsNs1+dP3f2uRmj0ajNsHUuFDzuuusuzJ07N+iuofnlL3/J6fUg1u1jUZ/PB4/HgzFjxiA8PBylpaVaW3V1NWpqamC32wEAdrsdlZWVqK+v1/qUlJTAZDIhLS2tu6WQIjqdDlOnTsUDDzwQFEc/IoKhQ4diwYIFQReI9FcBfQlvYWEhHnzwQSQlJaGpqQkbN27EJ598gl27dsFsNmP27NkoKChATEwMTCYT5s+fD7vdjvHjxwMApkyZgrS0NMyYMQMrV66E0+nEkiVLkJ+fD6PR2CsbSH1Dr9dj0aJFuHz5MkpLS5XNfnV+jGLlypU8Qg5yAYVPfX09nn76aZw/fx5msxkjRozArl27cP/99wMAXnvtNej1ekybNg0ejwfZ2dl44403tMeHhYVh+/btyMvLg91uR1RUFGbOnIlXXnmlZ7eKlIiOjsayZcsAQDsC7qsQ6jzi+vGPf4x/+Zd/QVJSUp+8LnVdt6/zUYHX+QS35uZmrF69Glu3bkV7e3uvB1Dnr/D48ePx4osvMngUCmTfDK17n1BIiI6Oxosvvoi0tDSsXbsW9fX1vTbVLSIwGo148sknMWfOHERGRvbK61DPY/hQr9Dr9XjkkUcwatQovP766/j4448hIj12FCQiEBHcfffdWLRoEUaPHh20V1nTzfFtF/W6trY2HD58GOvXr8fRo0fh8/mg0+kCDovOX1Wfz4e77roLTz31FO6//35ER0czeIIE33ZRUImIiMCECRMwduxYVFVVYffu3di3bx9qa2vh9Xq1t2TXB0hn2IgIfD4f4uLikJmZifvvvx+jR49m6IQ4Hvn0sM4dpXNYdTodwsLCFFcVXEQEra2tOHHiBCorK3Hq1Cl89dVXOHPmjF8/s9mM9PR0xMbGYty4cUhNTcWAAQP4UYkgxiMfBXw+H2pravDZgQP48759aP3mSu8BCQm4+777kD5uHKw2G/9S42ogR0ZGYuTIkRg5ciQAoL29/YbvddLr9YiIiOCY/UAxfLpJRODxePDhm2+idscOxHR0ID4sDPpvdhjvpUs4/cc/oqJ/f4x8+mn8fOpUGAwc9uuFh4fzauTbDI9fu0FEUO904j8KCnB52zYkAYg2GLTgAYAwnQ6W8HAktbXhL7//Pf7rlVfQ3NSkrmiiIMHw6Ya2tjb8v5UrMfDUKUR9z3kInU4HC4BwhwNb33oLXq+3T2okClYMny4SEXz4+9+j/7FjCAvgnIRRp8PlXbuwr7g4KD6ESaQKw6eLLl68iNM7dnzvEc/1dDodzCI48v77aGtr66XqiIIfw6cLRATHjhyB5cqVLj1ep9Mh7OxZ/OXPf+7hyohCB8Oniz7fswembsxaDdTp8Kf9+3uwIqLQwvDpIk9LS7cGL0ynQ2tLS4/VQxRqGD5EpATDp4usQ4fC043p8mavF4OHDu3BiohCC8OnC3Q6HUbcdx8auvEcrv79kZ6R0WM1EYUahk8X3ZWSAu/gwV26VqfD54NpxAjEDhzYC5URhQaGTxdFRkYie8EC1Ad4/28RwaW4ODw0dy4/7U63NYZPF+l0OqSNGoXkv/s7fH3NV2h8F58ILhoMGPvss0jgXVfpNsfw6QadTofsJ59EyoIFuBAVhY7vuGVwm9eLSzYbMpcswbhJk/g1EXTb43c7dJPBYMDPcnLwo7vuwu4NG+A+dgzm5mZ0fjnEFQCXY2IwcMwYPDpjBuJ/9CMGDxEASDcUFRUJAFmwYIG27sqVK/Lcc89JTEyMREVFydSpU8XpdPo97quvvpJf/OIXEhkZKYMGDZLf/OY30t7efsuv63K5BIC4XK7ulN+jfD6f+Hw+uXjxouzZtUu2v/eebH/vPSnbt08aGxvF5/OpLpGo1wWyb3b5yOfw4cP43e9+hxEjRvitf+GFF7Bjxw5s2bIFZrMZ8+bNw9SpU/Hpp58CALxeL3JycmCz2XDgwAGcP38eTz/9NMLDw7F8+fJuxKhanUczsbGxuG/KFMXVEIWArqRbU1OTpKSkSElJiUyaNEk78mlsbJTw8HDZsmWL1vfzzz8XAOJwOEREZOfOnaLX6/2OhtauXSsmk0k8Hs8tvX4wHvkQUWD7ZpdOOOfn5yMnJwdZWVl+68vLy9He3u63PjU1FUlJSXA4HAAAh8OB4cOHw2q1an2ys7PhdrtRVVV109fzeDxwu91+CxGFtoDfdm3evBlHjx7F4cOHb2hzOp2IiIiAxWLxW2+1WuF0OrU+1wZPZ3tn280UFRXh5ZdfDrRUIgpiAR351NbWYsGCBXj33XfRL8CL67qjsLAQLpdLW2pra/vstYmodwQUPuXl5aivr8fo0aNhMBhgMBiwZ88erF69GgaDAVarFW1tbWj85rYxnerq6mCz2QAANpsNdXV1N7R3tt2M0WiEyWTyW4gotAUUPpMnT0ZlZSUqKiq0JSMjA7m5udq/w8PDUVpaqj2muroaNTU1sNvtAAC73Y7KykrU19drfUpKSmAymZCWltZDm0VEwS6gcz4DBgxAenq637qoqCjExsZq62fPno2CggLExMTAZDJh/vz5sNvtGD9+PABgypQpSEtLw4wZM7By5Uo4nU4sWbIE+fn5MBqNPbRZRBTsevwK59deew16vR7Tpk2Dx+NBdnY23njjDa09LCwM27dvR15eHux2O6KiojBz5ky88sorPV0KEQUx3qudiHpMIPsmP1hKREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUCCh8/umf/gk6nc5vSU1N1dpbW1uRn5+P2NhYREdHY9q0aTfcGrmmpgY5OTno378/4uLisGjRInR0dPTM1hBRyAj4poH33HMP/u///u+vT2D461O88MIL2LFjB7Zs2QKz2Yx58+Zh6tSp+PTTTwEAXq8XOTk5sNlsOHDgAM6fP4+nn34a4eHhWL58eQ9sDhGFDAnAsmXLZOTIkTdta2xslPDwcNmyZYu27vPPPxcA4nA4RERk586dotfrxel0an3Wrl0rJpNJPB7PLdfhcrkEgLhcrkDKJ6JeFsi+GfA5n5MnTyIhIQF33XUXcnNzUVNTAwAoLy9He3s7srKytL6pqalISkqCw+EAADgcDgwfPhxWq1Xrk52dDbfbjaqqqm99TY/HA7fb7bcQUWgLKHwyMzOxfv16fPTRR1i7di1Onz6Nv/mbv0FTUxOcTiciIiJgsVj8HmO1WuF0OgEATqfTL3g62zvbvk1RURHMZrO2JCYmBlI2EQWhgM75PPjgg9q/R4wYgczMTAwZMgTvv/8+IiMje7y4ToWFhSgoKNB+drvdDCCiENetqXaLxYKhQ4fi1KlTsNlsaGtrQ2Njo1+furo62Gw2AIDNZrth9qvz584+N2M0GmEymfwWIgpt3Qqf5uZm/PnPf0Z8fDzGjBmD8PBwlJaWau3V1dWoqamB3W4HANjtdlRWVqK+vl7rU1JSApPJhLS0tO6UQkQhJqC3Xb/5zW/w8MMPY8iQITh37hyWLVuGsLAwTJ8+HWazGbNnz0ZBQQFiYmJgMpkwf/582O12jB8/HgAwZcoUpKWlYcaMGVi5ciWcTieWLFmC/Px8GI3GXtlAIgpOAYXPmTNnMH36dFy6dAmDBg3CxIkTcfDgQQwaNAgA8Nprr0Gv12PatGnweDzIzs7GG2+8oT0+LCwM27dvR15eHux2O6KiojBz5ky88sorPbtVRBT0dCIiqosIlNvthtlshsvl4vkfoiASyL7Jz3YRkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlAg6fs2fP4qmnnkJsbCwiIyMxfPhwHDlyRGsXESxduhTx8fGIjIxEVlYWTp486fccDQ0NyM3NhclkgsViwezZs9Hc3Nz9rSGikBFQ+Hz99deYMGECwsPDUVxcjOPHj+Nf//Vfcccdd2h9Vq5cidWrV2PdunUoKytDVFQUsrOz0draqvXJzc1FVVUVSkpKsH37duzduxdz5szpua0iouAnAVi8eLFMnDjxW9t9Pp/YbDZZtWqVtq6xsVGMRqNs2rRJRESOHz8uAOTw4cNan+LiYtHpdHL27NlbqsPlcgkAcblcgZRPRL0skH0zoCOfDz/8EBkZGXjiiScQFxeHUaNG4a233tLaT58+DafTiaysLG2d2WxGZmYmHA4HAMDhcMBisSAjI0Prk5WVBb1ej7Kyspu+rsfjgdvt9luIKLQFFD5ffvkl1q5di5SUFOzatQt5eXl4/vnn8c477wAAnE4nAMBqtfo9zmq1am1OpxNxcXF+7QaDATExMVqf6xUVFcFsNmtLYmJiIGUTURAKKHx8Ph9Gjx6N5cuXY9SoUZgzZw6effZZrFu3rrfqAwAUFhbC5XJpS21tba++HhH1voDCJz4+HmlpaX7rhg0bhpqaGgCAzWYDANTV1fn1qaur09psNhvq6+v92js6OtDQ0KD1uZ7RaITJZPJbiCi0BRQ+EyZMQHV1td+6EydOYMiQIQCA5ORk2Gw2lJaWau1utxtlZWWw2+0AALvdjsbGRpSXl2t9du/eDZ/Ph8zMzC5vCBGFmEDOZB86dEgMBoO8+uqrcvLkSXn33Xelf//+smHDBq3PihUrxGKxyNatW+Wzzz6TRx55RJKTk+XKlStanwceeEBGjRolZWVlsn//fklJSZHp06f3yhl1Iuo7geybAYWPiMi2bdskPT1djEajpKamyptvvunX7vP55KWXXhKr1SpGo1EmT54s1dXVfn0uXbok06dPl+joaDGZTDJr1ixpamq65RoYPkTBKZB9UyciovbYK3Butxtmsxkul4vnf4iCSCD7Jj/bRURKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKGFQX0BWdN1l1u92KKyGia3Xuk7dyI+SQDJ9Lly4BABITExVXQkQ309TUBLPZ/J19QjJ8YmJiAAA1NTXfu4E/ZG63G4mJiaitrb2t71nPcbgqGMZBRNDU1ISEhITv7RuS4aPXXz1VZTabb+tftk4mk4njAI5DJ9XjcKsHBDzhTERKMHyISImQDB+j0Yhly5bBaDSqLkUpjsNVHIerQm0cdHIrc2JERD0sJI98iCj0MXyISAmGDxEpwfAhIiVCMnzWrFmDO++8E/369UNmZiYOHTqkuqQeU1RUhLFjx2LAgAGIi4vDo48+iurqar8+ra2tyM/PR2xsLKKjozFt2jTU1dX59ampqUFOTg769++PuLg4LFq0CB0dHX25KT1qxYoV0Ol0WLhwobbudhmHs2fP4qmnnkJsbCwiIyMxfPhwHDlyRGsXESxduhTx8fGIjIxEVlYWTp486fccDQ0NyM3NhclkgsViwezZs9Hc3NzXm+JPQszmzZslIiJC/vM//1Oqqqrk2WefFYvFInV1dapL6xHZ2dny9ttvy7Fjx6SiokJ+8YtfSFJSkjQ3N2t95s6dK4mJiVJaWipHjhyR8ePHy09/+lOtvaOjQ9LT0yUrK0v++Mc/ys6dO2XgwIFSWFioYpO67dChQ3LnnXfKiBEjZMGCBdr622EcGhoaZMiQIfLMM89IWVmZfPnll7Jr1y45deqU1mfFihViNpvlD3/4g/zpT3+SX/7yl5KcnCxXrlzR+jzwwAMycuRIOXjwoOzbt09+8pOfyPTp01VskibkwmfcuHGSn5+v/ez1eiUhIUGKiooUVtV76uvrBYDs2bNHREQaGxslPDxctmzZovX5/PPPBYA4HA4REdm5c6fo9XpxOp1an7Vr14rJZBKPx9O3G9BNTU1NkpKSIiUlJTJp0iQtfG6XcVi8eLFMnDjxW9t9Pp/YbDZZtWqVtq6xsVGMRqNs2rRJRESOHz8uAOTw4cNan+LiYtHpdHL27NneK/57hNTbrra2NpSXlyMrK0tbp9frkZWVBYfDobCy3uNyuQD89cO05eXlaG9v9xuD1NRUJCUlaWPgcDgwfPhwWK1WrU92djbcbjeqqqr6sPruy8/PR05Ojt/2ArfPOHz44YfIyMjAE088gbi4OIwaNQpvvfWW1n769Gk4nU6/cTCbzcjMzPQbB4vFgoyMDK1PVlYW9Ho9ysrK+m5jrhNS4XPx4kV4vV6/XyYAsFqtcDqdiqrqPT6fDwsXLsSECROQnp4OAHA6nYiIiIDFYvHre+0YOJ3Om45RZ1uo2Lx5M44ePYqioqIb2m6Xcfjyyy+xdu1apKSkYNeuXcjLy8Pzzz+Pd955B8Bft+O79gmn04m4uDi/doPBgJiYGKXjEJKfar9d5Ofn49ixY9i/f7/qUvpcbW0tFixYgJKSEvTr1091Ocr4fD5kZGRg+fLlAIBRo0bh2LFjWLduHWbOnKm4uu4JqSOfgQMHIiws7IYZjbq6OthsNkVV9Y558+Zh+/bt+PjjjzF48GBtvc1mQ1tbGxobG/36XzsGNpvtpmPU2RYKysvLUV9fj9GjR8NgMMBgMGDPnj1YvXo1DAYDrFbrbTEO8fHxSEtL81s3bNgw1NTUAPjrdnzXPmGz2VBfX+/X3tHRgYaGBqXjEFLhExERgTFjxqC0tFRb5/P5UFpaCrvdrrCyniMimDdvHj744APs3r0bycnJfu1jxoxBeHi43xhUV1ejpqZGGwO73Y7Kykq/X7iSkhKYTKYbfpGD1eTJk1FZWYmKigptycjIQG5urvbv22EcJkyYcMOlFidOnMCQIUMAAMnJybDZbH7j4Ha7UVZW5jcOjY2NKC8v1/rs3r0bPp8PmZmZfbAV30LZqe4u2rx5sxiNRlm/fr0cP35c5syZIxaLxW9GI5Tl5eWJ2WyWTz75RM6fP68tly9f1vrMnTtXkpKSZPfu3XLkyBGx2+1it9u19s4p5ilTpkhFRYV89NFHMmjQoJCaYr6Za2e7RG6PcTh06JAYDAZ59dVX5eTJk/Luu+9K//79ZcOGDVqfFStWiMVika1bt8pnn30mjzzyyE2n2keNGiVlZWWyf/9+SUlJ4VR7V7z++uuSlJQkERERMm7cODl48KDqknoMgJsub7/9ttbnypUr8txzz8kdd9wh/fv3l8cee0zOnz/v9zx/+ctf5MEHH5TIyEgZOHCg/PrXv5b29vY+3pqedX343C7jsG3bNklPTxej0Sipqany5ptv+rX7fD556aWXxGq1itFolMmTJ0t1dbVfn0uXLsn06dMlOjpaTCaTzJo1S5qamvpyM27Ar9QgIiVC6pwPEf1wMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIl/j9RivXCaHJZ3QAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import gym\n",
    "\n",
    "\n",
    "#定义环境\n",
    "class MyWrapper(gym.Wrapper):\n",
    "\n",
    "    def __init__(self):\n",
    "        from pettingzoo.mpe import simple_tag_v3\n",
    "        env = simple_tag_v3.env(num_good=1,\n",
    "                                num_adversaries=1,\n",
    "                                num_obstacles=1,\n",
    "                                max_cycles=1e8,\n",
    "                                render_mode='rgb_array')\n",
    "        super().__init__(env)\n",
    "        self.env = env\n",
    "        self.step_n = 0\n",
    "\n",
    "    def reset(self):\n",
    "        self.env.reset()\n",
    "        self.step_n = 0\n",
    "        return self.state()\n",
    "\n",
    "    def state(self):\n",
    "        state = []\n",
    "        for i in self.env.agents:\n",
    "            state.append(env.observe(i).tolist())\n",
    "        state[-1].extend([0.0, 0.0])\n",
    "        return state\n",
    "\n",
    "    def step(self, action):\n",
    "        reward_sum = [0, 0]\n",
    "        for i in range(5):\n",
    "            if i != 0:\n",
    "                action = [-1, -1]\n",
    "            next_state, reward, over = self._step(action)\n",
    "            for j in range(2):\n",
    "                reward_sum[j] += reward[j]\n",
    "            self.step_n -= 1\n",
    "\n",
    "        self.step_n += 1\n",
    "\n",
    "        return next_state, reward_sum, over\n",
    "\n",
    "    def _step(self, action):\n",
    "        for i, _ in enumerate(env.agent_iter(2)):\n",
    "            self.env.step(action[i] + 1)\n",
    "\n",
    "        reward = [self.env.rewards[i] for i in self.env.agents]\n",
    "\n",
    "        _, _, termination, truncation, _ = env.last()\n",
    "        over = termination or truncation\n",
    "\n",
    "        #限制最大步数\n",
    "        self.step_n += 1\n",
    "        if self.step_n >= 100:\n",
    "            over = True\n",
    "\n",
    "        return self.state(), reward, over\n",
    "\n",
    "    #打印游戏图像\n",
    "    def show(self):\n",
    "        from matplotlib import pyplot as plt\n",
    "        plt.figure(figsize=(3, 3))\n",
    "        plt.imshow(self.env.render())\n",
    "        plt.show()\n",
    "\n",
    "\n",
    "env = MyWrapper()\n",
    "env.reset()\n",
    "\n",
    "env.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<__main__.A2C at 0x7f8abff8e9d0>, <__main__.A2C at 0x7f8abff265b0>]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "\n",
    "class A2C:\n",
    "\n",
    "    def __init__(self, model_actor, model_critic, model_critic_delay,\n",
    "                 optimizer_actor, optimizer_critic):\n",
    "        self.model_actor = model_actor\n",
    "        self.model_critic = model_critic\n",
    "        self.model_critic_delay = model_critic_delay\n",
    "        self.optimizer_actor = optimizer_actor\n",
    "        self.optimizer_critic = optimizer_critic\n",
    "\n",
    "        self.model_critic_delay.load_state_dict(self.model_critic.state_dict())\n",
    "        self.requires_grad(self.model_critic_delay, False)\n",
    "\n",
    "    def soft_update(self, _from, _to):\n",
    "        for _from, _to in zip(_from.parameters(), _to.parameters()):\n",
    "            value = _to.data * 0.99 + _from.data * 0.01\n",
    "            _to.data.copy_(value)\n",
    "\n",
    "    def requires_grad(self, model, value):\n",
    "        for param in model.parameters():\n",
    "            param.requires_grad_(value)\n",
    "\n",
    "    def train_critic(self, state, reward, next_state, over):\n",
    "        self.requires_grad(self.model_critic, True)\n",
    "        self.requires_grad(self.model_actor, False)\n",
    "\n",
    "        #计算values和targets\n",
    "        value = self.model_critic(state)\n",
    "\n",
    "        with torch.no_grad():\n",
    "            target = self.model_critic_delay(next_state)\n",
    "        target = target * 0.99 * (1 - over) + reward\n",
    "\n",
    "        #时序差分误差,也就是tdloss\n",
    "        loss = torch.nn.functional.mse_loss(value, target)\n",
    "\n",
    "        loss.backward()\n",
    "        self.optimizer_critic.step()\n",
    "        self.optimizer_critic.zero_grad()\n",
    "        self.soft_update(self.model_critic, self.model_critic_delay)\n",
    "\n",
    "        #减去value相当于去基线\n",
    "        return (target - value).detach()\n",
    "\n",
    "    def train_actor(self, state, action, value):\n",
    "        self.requires_grad(self.model_critic, False)\n",
    "        self.requires_grad(self.model_actor, True)\n",
    "\n",
    "        #重新计算动作的概率\n",
    "        prob = self.model_actor(state)\n",
    "        prob = prob.gather(dim=1, index=action)\n",
    "\n",
    "        #根据策略梯度算法的导函数实现\n",
    "        #函数中的Q(state,action),这里使用critic模型估算\n",
    "        prob = (prob + 1e-8).log() * value\n",
    "        loss = -prob.mean()\n",
    "\n",
    "        loss.backward()\n",
    "        self.optimizer_actor.step()\n",
    "        self.optimizer_actor.zero_grad()\n",
    "\n",
    "        return loss.item()\n",
    "\n",
    "\n",
    "model_actor = [\n",
    "    torch.nn.Sequential(\n",
    "        torch.nn.Linear(10, 64),\n",
    "        torch.nn.ReLU(),\n",
    "        torch.nn.Linear(64, 64),\n",
    "        torch.nn.ReLU(),\n",
    "        torch.nn.Linear(64, 4),\n",
    "        torch.nn.Softmax(dim=1),\n",
    "    ) for _ in range(2)\n",
    "]\n",
    "\n",
    "model_critic = [\n",
    "    torch.nn.Sequential(\n",
    "        torch.nn.Linear(20, 64),\n",
    "        torch.nn.ReLU(),\n",
    "        torch.nn.Linear(64, 64),\n",
    "        torch.nn.ReLU(),\n",
    "        torch.nn.Linear(64, 1),\n",
    "    ) for _ in range(2)\n",
    "]\n",
    "\n",
    "model_critic_delay = [\n",
    "    torch.nn.Sequential(\n",
    "        torch.nn.Linear(20, 64),\n",
    "        torch.nn.ReLU(),\n",
    "        torch.nn.Linear(64, 64),\n",
    "        torch.nn.ReLU(),\n",
    "        torch.nn.Linear(64, 1),\n",
    "    ) for _ in range(2)\n",
    "]\n",
    "\n",
    "optimizer_actor = [\n",
    "    torch.optim.Adam(model_actor[i].parameters(), lr=1e-3) for i in range(2)\n",
    "]\n",
    "\n",
    "optimizer_critic = [\n",
    "    torch.optim.Adam(model_critic[i].parameters(), lr=5e-3) for i in range(2)\n",
    "]\n",
    "\n",
    "a2c = [\n",
    "    A2C(model_actor[i], model_critic[i], model_critic_delay[i],\n",
    "        optimizer_actor[i], optimizer_critic[i]) for i in range(2)\n",
    "]\n",
    "\n",
    "model_actor = None\n",
    "model_critic = None\n",
    "model_critic_delay = None\n",
    "optimizer_actor = None\n",
    "optimizer_critic = None\n",
    "\n",
    "a2c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0.0, -50.48593521118164]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from IPython import display\n",
    "import random\n",
    "\n",
    "\n",
    "#玩一局游戏并记录数据\n",
    "def play(show=False):\n",
    "    state = []\n",
    "    action = []\n",
    "    reward = []\n",
    "    next_state = []\n",
    "    over = []\n",
    "\n",
    "    s = env.reset()\n",
    "    o = False\n",
    "    while not o:\n",
    "        a = []\n",
    "        for i in range(2):\n",
    "            #计算动作\n",
    "            prob = a2c[i].model_actor(torch.FloatTensor(s[i]).reshape(\n",
    "                1, -1))[0].tolist()\n",
    "            a.append(random.choices(range(4), weights=prob, k=1)[0])\n",
    "\n",
    "        #执行动作\n",
    "        ns, r, o = env.step(a)\n",
    "\n",
    "        state.append(s)\n",
    "        action.append(a)\n",
    "        reward.append(r)\n",
    "        next_state.append(ns)\n",
    "        over.append(o)\n",
    "\n",
    "        s = ns\n",
    "\n",
    "        if show:\n",
    "            display.clear_output(wait=True)\n",
    "            env.show()\n",
    "\n",
    "    state = torch.FloatTensor(state)\n",
    "    action = torch.LongTensor(action).unsqueeze(-1)\n",
    "    reward = torch.FloatTensor(reward).unsqueeze(-1)\n",
    "    next_state = torch.FloatTensor(next_state)\n",
    "    over = torch.LongTensor(over).reshape(-1, 1)\n",
    "\n",
    "    return state, action, reward, next_state, over, reward.sum(\n",
    "        dim=0).flatten().tolist()\n",
    "\n",
    "\n",
    "state, action, reward, next_state, over, reward_sum = play()\n",
    "\n",
    "reward_sum"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 -1.4495474100112915 [1.5, -678.4044189453125]\n",
      "2500 -0.3488815426826477 [215.0, -230.8831787109375]\n",
      "5000 -0.4887382388114929 [109.0, -120.56380462646484]\n",
      "7500 -0.9196931719779968 [62.5, -70.44122314453125]\n",
      "10000 -0.17468784749507904 [28.0, -46.351417541503906]\n",
      "12500 0.04246482998132706 [16.5, -30.41681480407715]\n",
      "15000 -0.06894251704216003 [30.5, -43.18037414550781]\n",
      "17500 -0.12670961022377014 [10.5, -14.714288711547852]\n",
      "20000 0.0034601313527673483 [20.5, -22.514062881469727]\n",
      "22500 0.07575763016939163 [9.0, -11.618470191955566]\n",
      "25000 -0.047059591859579086 [6.0, -9.934822082519531]\n",
      "27500 -0.06758452951908112 [10.5, -11.698257446289062]\n",
      "30000 0.06456369161605835 [12.0, -14.611892700195312]\n",
      "32500 -0.35115692019462585 [10.0, -11.417187690734863]\n",
      "35000 0.08109088242053986 [6.5, -8.675280570983887]\n",
      "37500 -0.12685692310333252 [3.5, -12.425094604492188]\n",
      "40000 0.004275428131222725 [11.0, -11.277917861938477]\n",
      "42500 -0.018033353611826897 [4.0, -14.036775588989258]\n",
      "45000 -0.051232386380434036 [7.0, -14.584927558898926]\n",
      "47500 0.014684038236737251 [8.5, -9.005228042602539]\n"
     ]
    }
   ],
   "source": [
    "def train():\n",
    "    #训练N局\n",
    "    for epoch in range(5_0000):\n",
    "        state, action, reward, next_state, over, _ = play()\n",
    "\n",
    "        #合并部分字段\n",
    "        state_c = state.flatten(start_dim=1)\n",
    "        next_state_c = next_state.flatten(start_dim=1)\n",
    "\n",
    "        for i in range(2):\n",
    "            value = a2c[i].train_critic(state_c, reward[:, i], next_state_c,\n",
    "                                        over)\n",
    "            loss = a2c[i].train_actor(state[:, i], action[:, i], value)\n",
    "\n",
    "        if epoch % 2500 == 0:\n",
    "            test_result = [play()[-1] for _ in range(20)]\n",
    "            test_result = torch.FloatTensor(test_result).mean(dim=0).tolist()\n",
    "            print(epoch, loss, test_result)\n",
    "\n",
    "\n",
    "train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEYCAYAAABlUvL1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAloklEQVR4nO3dfXRTdZ4/8PdN06QtJQltaVKGFvqbQUoFpLZQIuxxHTIUto48dHzgILIOK8dOYMSO4HRH68PsWBaPusOMwsg4wFEQh9mDCsvDdIuCQihQ7FhaKHVBW4G0Qk1SoE3b5Pv7g+kdI6iEpv0m9P06555D7veT5HOvvW9v7s3NVYQQAkREfUwjuwEi6p8YPkQkBcOHiKRg+BCRFAwfIpKC4UNEUjB8iEgKhg8RScHwISIpGD5EJIW08Hn55ZcxfPhwxMTEIDc3FwcPHpTVChFJICV83nrrLRQVFeGpp57CkSNHcMsttyAvLw/Nzc0y2iEiCRQZF5bm5uZi/Pjx+P3vfw8A8Pv9SE1NxeLFi/HLX/6yr9shIgm0ff2GHR0dqKysRHFxsTpPo9HAZrPB4XBc9Tlerxder1d97Pf70dLSgsTERCiK0us9E9G1EUKgtbUVQ4YMgUbz7R+s+jx8zp07B5/PB7PZHDDfbDbj+PHjV31OaWkpnnnmmb5oj4hCoLGxEUOHDv3Wmj4Pn+tRXFyMoqIi9bHb7UZaWhoaGxthMBgkdkZEX+XxeJCamoqBAwd+Z22fh09SUhKioqLQ1NQUML+pqQkWi+Wqz9Hr9dDr9VfMNxgMDB+iMHQth0P6/GyXTqdDdnY2ysvL1Xl+vx/l5eWwWq193Q4RSSLlY1dRURHmz5+PnJwcTJgwAf/1X/+Fixcv4sEHH5TRDhFJICV87r33XnzxxRcoKSmB0+nEuHHjsHPnzisOQhPR5TNI7e3taP6iGRBAQkIC4uPjI/5Mr5Tv+fSUx+OB0WiE2+3mMR+6obW1tWHvob14q/ItfNr1KQDArDGj4OYC5E3KC7sQCmbbZPgQhamLFy/iP//yn9jXtg9Rg6KgaC6HjBACPo8Pmb5MPHX3U0gcFD7fdwtm2+SFpURhyO/3Y807a+DQOKBN1KrBA1w+k6Q1anF84HE8v/l5+Hw+iZ1eP4YPURj6/MznKDtbBkX/zXs0mmgNKr2V+PjYx4jADzAMH6JwtPWDrfAme7/z45SSrOAvH/6lj7oKLYYPURhyXXRBE/3dm6cmSgN3m7sPOgo9hg9RGIqPiYff5//OOuETGKAf0AcdhR7DhygM3TnpTmibtN95LMd3zodZE2f1UVehxfAhCkPpw9Jxe+LtEJ3fHD7CJzBaGY3ssdlhc6o9GAwfojCk0WhQOLMQY9vGosvTFbAHJIRA18UupLakYlnBMkRroyV2ev34JUOiMOZp9WDHBzuw5dgWOKOcAIBBXYOQ///yMfP2mUhMCJ8vGAL8hjPRDUUIAU+rB42fN0JAICU5JWx/xTOYbTMifkyMqD9TFAVGgxHGTKPsVkKKx3yISAqGDxFJwfAhIikYPkQkBcOHiKRg+BCRFAwfIpKC4UNEUjB8iEgKhg8RSRF0+Ozduxc//vGPMWTIECiKgrfffjtgXAiBkpISpKSkIDY2FjabDfX19QE1LS0tmDt3LgwGA0wmExYsWIALFy70aEGIKLIEHT4XL17ELbfcgpdffvmq4ytWrMDKlSuxevVqVFRUYMCAAcjLy0N7e7taM3fuXNTU1KCsrAzbtm3D3r17sXDhwutfCiKKPKIHAIgtW7aoj/1+v7BYLOL5559X57lcLqHX68Wbb74phBCitrZWABCHDh1Sa3bs2CEURRGnT5++pvd1u90CgHC73T1pn4hCLJhtM6RXtZ86dQpOpxM2m02dZzQakZubC4fDgfvuuw8OhwMmkwk5OTlqjc1mg0ajQUVFBWbNuvInIb1eL7xer/rY4/GEsm3qx8Tff1Hm/LlzqDlyBG2XLgEAvjd8OL4/ciRiY2PD8qcrbgQhDR+n8/KPHX39nutms1kdczqdSE5ODmxCq0VCQoJa83WlpaV45plnQtkqEYQQOPrRR/jgrbdw4ehRGNvaoP17GJ3QaLDTbMbQ227DtDlzYEpIYAiFWESc7SouLobb7VanxsZG2S1RhOvo6MBf//u/sbekBAlHjiC9qwsJ0dEw6HQw6HRI0Wox/Px5dL79Nv785JM4UVsbkTfmC2chDR+LxQIAaGpqCpjf1NSkjlksFjQ3NweMd3V1oaWlRa35Or1eD4PBEDARXS+/349316xB4+rVGNzRgSjNN28GsVFRMNTXo+xXv0Ld0aMMoBAKafikp6fDYrGgvLxcnefxeFBRUQGr1QoAsFqtcLlcqKysVGt2794Nv9+P3NzcULZDdAUhBD7atw9N27bBGBV1TR+ltBoNki5exPuvvAJXS0sfdNk/BB0+Fy5cQFVVFaqqqgBcPshcVVWFhoYGKIqCJUuW4D/+4z/w7rvvorq6Gg888ACGDBmCmTNnAgBGjRqFadOm4aGHHsLBgwexb98+LFq0CPfddx+GDBkSymUjuoLL5cJ7v/0tBvt8QT0vSqPBgPp6bH/9de79hEjQB5wPHz6MO+64Q31cVFQEAJg/fz7WrVuHZcuW4eLFi1i4cCFcLhcmT56MnTt3IiYmRn3Ohg0bsGjRIkyZMgUajQYFBQVYuXJlCBaH6Nsdq6rCgJYWKNHB324mJioKnzocuPhv/4b4+Phe6K5/4d0rqN/w+/147dlnEb9vH7Tfcpzn2zg7OzF5+XJkTZgQ4u5uDMFsmxFxtosoFIQQOP/JJ9cdPAAwEEDjJ5+Erql+jOFDRFIwfKhfiY6N7dEBYx8AfWxs6Brqxxg+1G9oNBqkT5qEtiDPdH1Vi06HMTzeExIMH+o3FEXBuMmTcf46j/n4hYDupptgSUkJcWf9E8OH+pVhw4dj0OTJ8Pr9QT1PCIGWqCjcds89vMYrRBg+1K9EabX4SVERzn3ve/AHceznot+PIffei+zbbmP4hAjDh/qd+Ph4zPj3f8dZi+U794CEEDgPQPejH2HqvfdC04PT9BSIa5L6HUVRMHzECPz0979H16RJ+NznQ5vPF3AWzCcEvuzsRENMDEYvWYL7HnsMsTzLFVL8hjP1a11dXThZX4+qDz5Aw/796Pz7j4kZhw1Dxu23Y/T48RicnMyPWtcomG2T4UOEyx+vurq61L2fqKgoREVFSe4q8gSzbYb0lwyJIpWiKIi+jotN6frxmA8RScHwISIpGD5EJAXDh4ikYPgQkRQMHyKSguFDRFIwfIhICoYPEUnB8CEiKYIKn9LSUowfPx4DBw5EcnIyZs6cibq6uoCa9vZ22O12JCYmIj4+HgUFBVfcPrmhoQH5+fmIi4tDcnIyli5diq6urp4vDRFFjKDCZ8+ePbDb7Thw4ADKysrQ2dmJqVOn4uLFi2rNo48+iq1bt2Lz5s3Ys2cPzpw5g9mzZ6vjPp8P+fn56OjowP79+7F+/XqsW7cOJSUloVsqIgp/ogeam5sFALFnzx4hhBAul0tER0eLzZs3qzXHjh0TAITD4RBCCLF9+3ah0WiE0+lUa1atWiUMBoPwer3X9L5ut1sAEG63uyftE1GIBbNt9uiYj9vtBgAkJCQAACorK9HZ2QmbzabWZGRkIC0tDQ6HAwDgcDgwZswYmM1mtSYvLw8ejwc1NTVXfR+v1wuPxxMwEVFku+7w8fv9WLJkCSZNmoTRo0cDAJxOJ3Q6HUwmU0Ct2WyG0+lUa74aPN3j3WNXU1paCqPRqE6pqanX2zYRhYnrDh+73Y6jR49i06ZNoeznqoqLi+F2u9WpsbGx19+TiHrXdf2Y2KJFi7Bt2zbs3bsXQ4cOVedbLBZ0dHTA5XIF7P00NTXBYrGoNQcPHgx4ve6zYd01X6fX66HX66+nVSIKU0Ht+QghsGjRImzZsgW7d+9Genp6wHh2djaio6NRXl6uzqurq0NDQwOsVisAwGq1orq6Gs3NzWpNWVkZDAYDMjMze7IsRBRBgtrzsdvt2LhxI9555x0MHDhQPUZjNBoRGxsLo9GIBQsWoKioCAkJCTAYDFi8eDGsVismTpwIAJg6dSoyMzMxb948rFixAk6nE0888QTsdjv3boj6k2BOowG46rR27Vq1pq2tTfzsZz8TgwYNEnFxcWLWrFni7NmzAa/z6aefiunTp4vY2FiRlJQkfvGLX4jOzs5r7oOn2onCUzDbJu9eQUQhE8y2yWu7iEgKhg8RScHwISIpGD5EJAXDh4ikYPgQkRQMHyKSguFDRFIwfIhICoYPEUnB8CEiKRg+RCQFw4eIpGD4EJEUDB8ikoLhQ0RSMHyISAqGDxFJwfAhIikYPkQkBcOHiKRg+BCRFEGFz6pVqzB27FgYDAYYDAZYrVbs2LFDHW9vb4fdbkdiYiLi4+NRUFCg3gq5W0NDA/Lz8xEXF4fk5GQsXboUXV1doVkaIooYQYXP0KFDsXz5clRWVuLw4cP44Q9/iBkzZqCmpgYA8Oijj2Lr1q3YvHkz9uzZgzNnzmD27Nnq830+H/Lz89HR0YH9+/dj/fr1WLduHUpKSkK7VEQU/np6h8JBgwaJP/7xj8Llcono6GixefNmdezYsWMCgHA4HEIIIbZv3y40Go1wOp1qzapVq4TBYBBer/ea35N3LCUKT8Fsm9d9zMfn82HTpk24ePEirFYrKisr0dnZCZvNptZkZGQgLS0NDocDAOBwODBmzBiYzWa1Ji8vDx6PR917uhqv1wuPxxMwEVFkCzp8qqurER8fD71ej4cffhhbtmxBZmYmnE4ndDodTCZTQL3ZbIbT6QQAOJ3OgODpHu8e+yalpaUwGo3qlJqaGmzbRBRmgg6fkSNHoqqqChUVFSgsLMT8+fNRW1vbG72piouL4Xa71amxsbFX34+Iep822CfodDr84Ac/AABkZ2fj0KFD+O1vf4t7770XHR0dcLlcAXs/TU1NsFgsAACLxYKDBw8GvF732bDumqvR6/XQ6/XBtkpEYazH3/Px+/3wer3Izs5GdHQ0ysvL1bG6ujo0NDTAarUCAKxWK6qrq9Hc3KzWlJWVwWAwIDMzs6etEFEECWrPp7i4GNOnT0daWhpaW1uxceNGvP/++9i1axeMRiMWLFiAoqIiJCQkwGAwYPHixbBarZg4cSIAYOrUqcjMzMS8efOwYsUKOJ1OPPHEE7Db7dyzIepnggqf5uZmPPDAAzh79iyMRiPGjh2LXbt24Uc/+hEA4KWXXoJGo0FBQQG8Xi/y8vLwyiuvqM+PiorCtm3bUFhYCKvVigEDBmD+/Pl49tlnQ7tURBT2FCGEkN1EsDweD4xGI9xuNwwGg+x2iOjvgtk2eW0XEUnB8CEiKRg+RCQFw4eIpGD4EJEUDB8ikoLhQ0RSMHyISAqGDxFJwfAhIimC/kkNonDh8/nQ2dl5xXy9Xg9FUSR0RMFg+FDE6OjowGeffYYTJ07go48+wrlz53Ds2LGAGp1Oh1tvvRUDBw7EbbfdhhEjRiAhIQFRUVGSuqZvwgtLKaz5fD6cPHkSe/fuxXvvvYf6+nq0t7cHhMlX93K6/5yFEPD7/TCZTMjOzobNZsOECROQmJjIvaJeFMy2yfChsOTz+XDixAm8/vrr2L17N9rb26HRaIIOju4/b5/PB4vFgnvuuQczZsxgCPUShg9FtJaWFqxfvx5vvfUWOjo6ACAkQdG9N5SSkoKioiLccccd0Gp55CGUGD4UkYQQOHToEF544QWcOHECiqL0yt6JEAKKomDatGkoKipCYmJiyN+jvwpm22TsU1jo7OzExo0bsXbtWng8Hmg0vfctEEVRIITAjh07cOrUKTz22GPIysrix7A+xu/5kHSdnZ1Ys2YNfve738Hj8fRJCHTvVR0/fhzLli1DZWUlIvBDQERj+JBU3cHzpz/9CX6/v8/3PhRFQUtLC375y1/iyJEjDKA+xPAhaYQQ2Lx5s7Tg6dYdQMuWLcNnn30mpYf+iOFD0hw5cgRr1qyRGjzdFEXBl19+ieeeew4ej0dqL/0Fw4ekcLlcePHFF+F2u6UHTzdFUXDo0CFs3LgRfr9fdjs3vB6Fz/Lly6EoCpYsWaLOa29vh91uR2JiIuLj41FQUKDeErlbQ0MD8vPzERcXh+TkZCxduhRdXV09aYUiiN/vx+uvv47a2tqwCZ5uiqLgjTfewMcffyy7lRvedYfPoUOH8Ic//AFjx44NmP/oo49i69at2Lx5M/bs2YMzZ85g9uzZ6rjP50N+fj46Ojqwf/9+rF+/HuvWrUNJScn1LwVFlFOnTmHTpk1hFzzA5fC5dOkSVq1addWLVil0rit8Lly4gLlz52LNmjUYNGiQOt/tduO1117Diy++iB/+8IfIzs7G2rVrsX//fhw4cAAA8Ne//hW1tbV44403MG7cOEyfPh2//vWv8fLLL6vfZqUbl9/vx4YNG3Dp0qWwDB/gcgBVVlbi8OHDslu5oV1X+NjtduTn58NmswXMr6ysRGdnZ8D8jIwMpKWlweFwAAAcDgfGjBkDs9ms1uTl5cHj8aCmpuaq7+f1euHxeAImikynT5/Grl27wjZ4unV/NOSxn94TdPhs2rQJR44cQWlp6RVjTqcTOp0OJpMpYL7ZbIbT6VRrvho83ePdY1dTWloKo9GoTqmpqcG2TWFACIF9+/aF9V5PN0VRUF1d/Y1/k9RzQYVPY2MjHnnkEWzYsAExMTG91dMViouL4Xa71amxsbHP3ptCx+/3Y/fu3WEfPN0uXLiAAwcO8IuHvSSo8KmsrERzczNuvfVWaLVaaLVa7NmzBytXroRWq4XZbEZHRwdcLlfA85qammCxWAAAFovlirNf3Y+7a75Or9fDYDAETBR5zp49i5qamogJH0VRUF5ezvDpJUGFz5QpU1BdXY2qqip1ysnJwdy5c9V/R0dHo7y8XH1OXV0dGhoaYLVaAQBWqxXV1dVobm5Wa8rKymAwGJCZmRmixaJwVF9fj0uXLslu45opioKTJ0+itbVVdis3pKCuah84cCBGjx4dMG/AgAFITExU5y9YsABFRUVISEiAwWDA4sWLYbVaMXHiRADA1KlTkZmZiXnz5mHFihVwOp144oknYLfbodfrQ7RYFI5qa2tltxC0pqYmnDt3DkajUXYrN5yQ/6TGSy+9BI1Gg4KCAni9XuTl5eGVV15Rx6OiorBt2zYUFhbCarViwIABmD9/Pp599tlQt0JhxO/348yZM7LbCJoQAmfOnMH3v/992a3ccPhjYtQnfD4f7rnnHnz66aeyWwmKz+eD3W7HQw89JLuViBDMtslru4hICoYPEUnB8CEiKRg+1Gfi4uIi8jszAwYMkN3CDYnhQ31Co9FgwoQJERc+Go0G48ePl93GDYnhQ31CUZSIPDOp0+n69FKi/oThQ31mwoQJslsIihACaWlpSElJkd3KDYnhQ30mLS0N3/ve9yLmo5cQArm5uQH3hafQYfhQn4mLi8PkyZMjJnwURcGUKVMi5kLYSMPwoT7TvTFHgu6PXCNHjpTdyg2L4UN96uabb8bIkSPDfu9HCIHp06fzYHMvYvhQn4qNjcW8efPCOnyEEEhKSsLs2bP5kasXMXyoTymKgjvuuAOjRo0K6wC6++67kZSUJLuNGxrDh/pcbGwsHn/8cej1+rALICEERo8ejfvvv597Pb2M4UN9TlEUjBkzBvfff7/sVgIIIdRgjI2Nld3ODY/hQ1JoNBr89Kc/DbtT70uXLkVmZib3evoAw4ekiY2NRVFREYYPHx4WATRr1ixMmzaNwdNHGD4k1bBhw/Diiy9KD6CZM2di6dKlPLXehxg+JJWiKBg2bBheeOEFpKen92kAdb8Xg0cOhg9JpygKhg8fjldffRV33nknAPR6CAkhEBcXh6effhrLli1j8EjA8KGwoCgKEhMT8atf/QolJSVISkrqlfukCyHg9/sxbtw4vPbaa7jzzjt5yyZJQn7rHKKe0Ov1uOuuuzBu3Dj88Y9/RHl5Odra2qDR9Oz/k0IICCFgsVhw33334Z577kFMTAwPLksU1H/Rp59+GoqiBEwZGRnqeHt7O+x2OxITExEfH4+CgoIrbo3c0NCA/Px8xMXFITk5GUuXLkVXV1doloZuCN3HgZ555hmsXbsWd911FwYPHqzutVwrIQR8Ph8URcGIESPwyCOPYOPGjXjggQcQGxvL4JEs6D2fm2++Gf/7v//7jxfQ/uMlHn30UfzP//wPNm/eDKPRiEWLFmH27NnYt28fgMv3QMrPz4fFYsH+/ftx9uxZPPDAA4iOjsZzzz0XgsWhG4lGo8HIkSPx9NNPw+Px4G9/+xvKy8tx7NgxfPbZZ/D5fOjs7LzieXq9HjqdDjfddBMmTJiAf/7nf0Z6ejp0Oh0DJ4wEddPAp59+Gm+//TaqqqquGHO73Rg8eDA2btyIn/zkJwCA48ePY9SoUXA4HJg4cSJ27NiBO++8E2fOnIHZbAYArF69Go8//ji++OIL6HS6a+qDNw3sv4QQ8Hq9cLlcOH/+PI4fPx4wrtPpkJWVBb1ej8TERHUPnfpGMNtm0Hs+9fX1GDJkCGJiYmC1WlFaWoq0tDRUVlais7MTNptNrc3IyEBaWpoaPg6HA2PGjFGDBwDy8vJQWFiImpoaZGVlXfU9vV4vvF5vwAJS/6QoCmJiYmCxWGCxWHDzzTfLbomuU1DHfHJzc7Fu3Trs3LkTq1atwqlTp/BP//RPaG1thdPphE6ng8lkCniO2WyG0+kEADidzoDg6R7vHvsmpaWlMBqN6pSamhpM20QUhoLa85k+fbr677FjxyI3NxfDhg3Dn//85169EK+4uBhFRUXqY4/HwwAiinA9On9pMplw00034ZNPPoHFYkFHRwdcLldATVNTEywWCwDAYrFccfar+3F3zdXo9XoYDIaAiYgiW4/C58KFC/i///s/pKSkIDs7G9HR0SgvL1fH6+rq0NDQAKvVCgCwWq2orq5Gc3OzWlNWVgaDwYDMzMyetEJEESaoj12PPfYYfvzjH2PYsGE4c+YMnnrqKURFRWHOnDkwGo1YsGABioqKkJCQAIPBgMWLF8NqtWLixIkAgKlTpyIzMxPz5s3DihUr4HQ68cQTT8But/NbpkT9TFDh8/nnn2POnDk4f/48Bg8ejMmTJ+PAgQMYPHgwAOCll16CRqNBQUEBvF4v8vLy8Morr6jPj4qKwrZt21BYWAir1YoBAwZg/vz5ePbZZ0O7VEQU9oL6nk+44Pd8iMJTMNsmLywlIikYPkQkBcOHiKRg+BCRFAwfIpKC4UNEUjB8iEgKhg8RScHwISIpGD5EJAXDh4ikYPgQkRQMHyKSguFDRFIwfIhICoYPEUnB8CEiKRg+RCQFw4eIpGD4EJEUDB8ikoLhQ0RSBB0+p0+fxv3334/ExETExsZizJgxOHz4sDouhEBJSQlSUlIQGxsLm82G+vr6gNdoaWnB3LlzYTAYYDKZsGDBAly4cKHnS0NEESOo8Pnyyy8xadIkREdHY8eOHaitrcULL7yAQYMGqTUrVqzAypUrsXr1alRUVGDAgAHIy8tDe3u7WjN37lzU1NSgrKwM27Ztw969e7Fw4cLQLRURhT8RhMcff1xMnjz5G8f9fr+wWCzi+eefV+e5XC6h1+vFm2++KYQQora2VgAQhw4dUmt27NghFEURp0+fvqY+3G63ACDcbncw7RNRLwtm2wxqz+fdd99FTk4O7r77biQnJyMrKwtr1qxRx0+dOgWn0wmbzabOMxqNyM3NhcPhAAA4HA6YTCbk5OSoNTabDRqNBhUVFVd9X6/XC4/HEzARUWQLKnxOnjyJVatWYcSIEdi1axcKCwvx85//HOvXrwcAOJ1OAIDZbA54ntlsVsecTieSk5MDxrVaLRISEtSarystLYXRaFSn1NTUYNomojAUVPj4/X7ceuuteO6555CVlYWFCxfioYcewurVq3urPwBAcXEx3G63OjU2Nvbq+xFR7wsqfFJSUpCZmRkwb9SoUWhoaAAAWCwWAEBTU1NATVNTkzpmsVjQ3NwcMN7V1YWWlha15uv0ej0MBkPARESRLajwmTRpEurq6gLmnThxAsOGDQMApKenw2KxoLy8XB33eDyoqKiA1WoFAFitVrhcLlRWVqo1u3fvht/vR25u7nUvCBFFmGCOZB88eFBotVrxm9/8RtTX14sNGzaIuLg48cYbb6g1y5cvFyaTSbzzzjvi448/FjNmzBDp6emira1NrZk2bZrIysoSFRUV4sMPPxQjRowQc+bM6ZUj6kTUd4LZNoMKHyGE2Lp1qxg9erTQ6/UiIyNDvPrqqwHjfr9fPPnkk8JsNgu9Xi+mTJki6urqAmrOnz8v5syZI+Lj44XBYBAPPvigaG1tveYeGD5E4SmYbVMRQgi5+17B83g8MBqNcLvdPP5DFEaC2TZ5bRcRScHwISIpGD5EJAXDh4ikYPgQkRQMHyKSguFDRFIwfIhICoYPEUnB8CEiKRg+RCQFw4eIpGD4EJEUDB8ikoLhQ0RSMHyISAqGDxFJwfAhIikYPkQkBcOHiKRg+BCRFAwfIpKC4UNEUjB8iEgKhg8RSaGV3cD16L7JqsfjkdwJEX1V9zZ5LTdCjsjwOX/+PAAgNTVVcidEdDWtra0wGo3fWhOR4ZOQkAAAaGho+M4FvJF5PB6kpqaisbGxX9+znuvhsnBYD0IItLa2YsiQId9ZG5Hho9FcPlRlNBr79R9bN4PBwPUArodustfDte4Q8IAzEUnB8CEiKSIyfPR6PZ566ino9XrZrUjF9XAZ18NlkbYeFHEt58SIiEIsIvd8iCjyMXyISAqGDxFJwfAhIikiMnxefvllDB8+HDExMcjNzcXBgwdltxQypaWlGD9+PAYOHIjk5GTMnDkTdXV1ATXt7e2w2+1ITExEfHw8CgoK0NTUFFDT0NCA/Px8xMXFITk5GUuXLkVXV1dfLkpILV++HIqiYMmSJeq8/rIeTp8+jfvvvx+JiYmIjY3FmDFjcPjwYXVcCIGSkhKkpKQgNjYWNpsN9fX1Aa/R0tKCuXPnwmAwwGQyYcGCBbhw4UJfL0ogEWE2bdokdDqd+NOf/iRqamrEQw89JEwmk2hqapLdWkjk5eWJtWvXiqNHj4qqqirxL//yLyItLU1cuHBBrXn44YdFamqqKC8vF4cPHxYTJ04Ut912mzre1dUlRo8eLWw2m/joo4/E9u3bRVJSkiguLpaxSD128OBBMXz4cDF27FjxyCOPqPP7w3poaWkRw4YNE//6r/8qKioqxMmTJ8WuXbvEJ598otYsX75cGI1G8fbbb4u//e1v4q677hLp6emira1NrZk2bZq45ZZbxIEDB8QHH3wgfvCDH4g5c+bIWCRVxIXPhAkThN1uVx/7fD4xZMgQUVpaKrGr3tPc3CwAiD179gghhHC5XCI6Olps3rxZrTl27JgAIBwOhxBCiO3btwuNRiOcTqdas2rVKmEwGITX6+3bBeih1tZWMWLECFFWViZuv/12NXz6y3p4/PHHxeTJk79x3O/3C4vFIp5//nl1nsvlEnq9Xrz55ptCCCFqa2sFAHHo0CG1ZseOHUJRFHH69Onea/47RNTHro6ODlRWVsJms6nzNBoNbDYbHA6HxM56j9vtBvCPi2krKyvR2dkZsA4yMjKQlpamrgOHw4ExY8bAbDarNXl5efB4PKipqenD7nvObrcjPz8/YHmB/rMe3n33XeTk5ODuu+9GcnIysrKysGbNGnX81KlTcDqdAevBaDQiNzc3YD2YTCbk5OSoNTabDRqNBhUVFX23MF8TUeFz7tw5+Hy+gD8mADCbzXA6nZK66j1+vx9LlizBpEmTMHr0aACA0+mETqeDyWQKqP3qOnA6nVddR91jkWLTpk04cuQISktLrxjrL+vh5MmTWLVqFUaMGIFdu3ahsLAQP//5z7F+/XoA/1iOb9smnE4nkpOTA8a1Wi0SEhKkroeIvKq9v7Db7Th69Cg+/PBD2a30ucbGRjzyyCMoKytDTEyM7Hak8fv9yMnJwXPPPQcAyMrKwtGjR7F69WrMnz9fcnc9E1F7PklJSYiKirrijEZTUxMsFoukrnrHokWLsG3bNrz33nsYOnSoOt9isaCjowMulyug/qvrwGKxXHUddY9FgsrKSjQ3N+PWW2+FVquFVqvFnj17sHLlSmi1WpjN5n6xHlJSUpCZmRkwb9SoUWhoaADwj+X4tm3CYrGgubk5YLyrqwstLS1S10NEhY9Op0N2djbKy8vVeX6/H+Xl5bBarRI7Cx0hBBYtWoQtW7Zg9+7dSE9PDxjPzs5GdHR0wDqoq6tDQ0ODug6sViuqq6sD/uDKyspgMBiu+EMOV1OmTEF1dTWqqqrUKScnB3PnzlX/3R/Ww6RJk674qsWJEycwbNgwAEB6ejosFkvAevB4PKioqAhYDy6XC5WVlWrN7t274ff7kZub2wdL8Q2kHeq+Tps2bRJ6vV6sW7dO1NbWioULFwqTyRRwRiOSFRYWCqPRKN5//31x9uxZdbp06ZJa8/DDD4u0tDSxe/ducfjwYWG1WoXValXHu08xT506VVRVVYmdO3eKwYMHR9Qp5qv56tkuIfrHejh48KDQarXiN7/5jaivrxcbNmwQcXFx4o033lBrli9fLkwmk3jnnXfExx9/LGbMmHHVU+1ZWVmioqJCfPjhh2LEiBE81X49fve734m0tDSh0+nEhAkTxIEDB2S3FDIArjqtXbtWrWlraxM/+9nPxKBBg0RcXJyYNWuWOHv2bMDrfPrpp2L69OkiNjZWJCUliV/84heis7Ozj5cmtL4ePv1lPWzdulWMHj1a6PV6kZGRIV599dWAcb/fL5588klhNpuFXq8XU6ZMEXV1dQE158+fF3PmzBHx8fHCYDCIBx98ULS2tvblYlyBP6lBRFJE1DEfIrpxMHyISAqGDxFJwfAhIikYPkQkBcOHiKRg+BCRFAwfIpKC4UNEUjB8iEgKhg8RScHwISIp/j/9eNZ5ZXV90gAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "[0.0, -6.024299621582031]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "play(True)[-1]"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "第9章-策略梯度算法.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python [conda env:cuda117]",
   "language": "python",
   "name": "conda-env-cuda117-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
